***** Anomaly Detection using FbProphet *****

Loading required Libraries

In [1]:
import datetime
import pandas as pd
import requests
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import plotly.express as px
import numpy as np
from fbprophet import Prophet
import pandas as pd
import numpy as np
from sklearn.preprocessing import OrdinalEncoder
from fancyimpute import KNN
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import chart_studio.plotly as py
import matplotlib.pyplot as plt
from matplotlib import pyplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)

mpl.rcParams['figure.figsize'] = (10,8)
mpl.rcParams['axes.grid'] = False


print("Libraries imported succesfully")
Libraries imported succesfully

Loading the dataset from the given csv file

In [2]:
df_ads = pd.read_csv('ads_challenge.csv')
display(df_ads)
Date Country ad_type1_impressions ad_type1_CTR ad_type2_impressions ad_type2_videos_completed ad_type2_CTR
0 2019-01-02 Albania 23,962 1.47% 12,900 97.59% 1.37%
1 2019-01-02 Algeria 50,643 1.63% 35,458 97.77% 1.45%
2 2019-01-02 Argentina 760,871 0.65% 1,006,527 98.03% 0.47%
3 2019-01-02 Armenia 22,796 1.25% NaN NaN NaN
4 2019-01-02 Australia 407,314 0.58% 1,290,808 98.50% 0.48%
... ... ... ... ... ... ... ...
13212 2019-04-29 Macao NaN NaN 12,032 0.00% 0.66%
13213 2019-04-29 Malta NaN NaN 31,695 0.00% 0.72%
13214 2019-04-29 Palestine NaN NaN 33,792 0.00% 0.52%
13215 2019-04-29 Syria NaN NaN 14,035 0.00% 3.70%
13216 2019-04-29 Uzbekistan NaN NaN 13,698 0.00% 2.06%

13217 rows × 7 columns

In [3]:
# showing data types of the features
df_ads.dtypes
Out[3]:
Date                         object
Country                      object
ad_type1_impressions         object
ad_type1_CTR                 object
ad_type2_impressions         object
ad_type2_videos_completed    object
ad_type2_CTR                 object
dtype: object
In [4]:
# converting data datatype to datetime
df_ads.Date = pd.to_datetime(df_ads['Date'])
df_ads.dtypes
Out[4]:
Date                         datetime64[ns]
Country                              object
ad_type1_impressions                 object
ad_type1_CTR                         object
ad_type2_impressions                 object
ad_type2_videos_completed            object
ad_type2_CTR                         object
dtype: object

Exploratory Data Analysis

In [5]:
# showing information of the data type, You can also use df.summary() for the detailed summary.
df_ads.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13217 entries, 0 to 13216
Data columns (total 7 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   Date                       13217 non-null  datetime64[ns]
 1   Country                    13202 non-null  object        
 2   ad_type1_impressions       5364 non-null   object        
 3   ad_type1_CTR               5364 non-null   object        
 4   ad_type2_impressions       13035 non-null  object        
 5   ad_type2_videos_completed  13035 non-null  object        
 6   ad_type2_CTR               13035 non-null  object        
dtypes: datetime64[ns](1), object(6)
memory usage: 722.9+ KB
In [6]:
# Shape of the dataset
df_ads.shape
Out[6]:
(13217, 7)
In [7]:
# Showing Missing Values
df_ads.isnull().sum()/len(df_ads)*100
Out[7]:
Date                          0.000000
Country                       0.113490
ad_type1_impressions         59.415904
ad_type1_CTR                 59.415904
ad_type2_impressions          1.377014
ad_type2_videos_completed     1.377014
ad_type2_CTR                  1.377014
dtype: float64
In [8]:
# converting object data type to float
df_ads['ad_type1_impressions'] = df_ads['ad_type1_impressions'].str.replace(',','').astype('float')
df_ads['ad_type2_impressions'] = df_ads['ad_type2_impressions'].str.replace(',','').astype('float')
df_ads['ad_type1_CTR'] = df_ads['ad_type1_CTR'].str.replace('%','').astype('float') * 10**(-2)
df_ads['ad_type2_CTR'] = df_ads['ad_type2_CTR'].str.replace('%','').astype('float')* 10**(-2)
df_ads['ad_type2_videos_completed'] = df_ads['ad_type2_videos_completed'].str.replace('%','').astype('float')* 10**(-2)
df_ads.head()
Out[8]:
Date Country ad_type1_impressions ad_type1_CTR ad_type2_impressions ad_type2_videos_completed ad_type2_CTR
0 2019-01-02 Albania 23962.0 0.0147 12900.0 0.9759 0.0137
1 2019-01-02 Algeria 50643.0 0.0163 35458.0 0.9777 0.0145
2 2019-01-02 Argentina 760871.0 0.0065 1006527.0 0.9803 0.0047
3 2019-01-02 Armenia 22796.0 0.0125 NaN NaN NaN
4 2019-01-02 Australia 407314.0 0.0058 1290808.0 0.9850 0.0048

Creating a dataframe with the important features selected. For now I am selecting impressions as well as type 2 videos completed. We can omit them as their importance is not significant but this decision can be taken based on doman expertise knowledge to improve the performance further. For now I am keeping these variables.

In [9]:
ctr_video = df_ads[['Date','Country','ad_type2_impressions','ad_type2_videos_completed','ad_type1_CTR','ad_type2_CTR']]
In [10]:
# Daily CTR Trend of United States 
fig = px.line(df_ads[df_ads.Country=="United States"].reset_index(), x = 'Date',y = ['ad_type1_CTR','ad_type1_impressions'],title = 'Type-1 CTR')

fig.update_xaxes(
    rangeslider_visible= True,
    rangeselector=dict(
                        buttons = list([
                        dict(count = 1,label = '1m',step='month',stepmode = "backward"),
                        dict(count = 2,label = '6m',step='month',stepmode = "backward"),
                        dict(count = 3,label = '12m',step='month',stepmode = "todate"),
                        dict(step= 'all')
                            ])        
                        )
                   )
fig.show()

As you can see that the data is entirely missing from Jan 9 till Feb 14th for both impressions and CTR for ads group-1 There can be multiple reasons for it

1) Data was not gathered/collected or measured due to some technical issue.

2) There was no data - no ads were shown during that period.

In order to solve the above cases , I am making as assumption that due to covid start - no ads was shown during that period for group 1 therefore we don't have datapoints avaialble for that period of time. There are multiple ways to deal with it .for the sake of simplicity for now, I am replacing these values with 0.

In [11]:
# Replacing null value with 0
ctr_video['ad_type1_CTR'] = ctr_video['ad_type1_CTR'].fillna(0)
C:\Users\FX253KA\Anaconda3\envs\3.7_env\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [12]:
# Deleting(dropping) rows where impressions and videos completed are null and CTR is null
ctr_video.dropna(subset=['Date','ad_type2_impressions','ad_type2_videos_completed','ad_type2_CTR'], inplace = True, how = 'all')
ctr_video[ctr_video.ad_type2_CTR.isna()]
C:\Users\FX253KA\Anaconda3\envs\3.7_env\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

Out[12]:
Date Country ad_type2_impressions ad_type2_videos_completed ad_type1_CTR ad_type2_CTR
3 2019-01-02 Armenia NaN NaN 0.0125 NaN
42 2019-01-02 Kosovo NaN NaN 0.0134 NaN
44 2019-01-02 Laos NaN NaN 0.0181 NaN
50 2019-01-02 Macedonia NaN NaN 0.0079 NaN
53 2019-01-02 Mongolia NaN NaN 0.0209 NaN
... ... ... ... ... ... ...
13117 2019-04-29 Bosnia and Herzegovina NaN NaN 0.0065 NaN
13121 2019-04-29 Cameroon NaN NaN 0.0196 NaN
13158 2019-04-29 Laos NaN NaN 0.0153 NaN
13162 2019-04-29 Macedonia NaN NaN 0.0097 NaN
13166 2019-04-29 Mongolia NaN NaN 0.0161 NaN

182 rows × 6 columns

In [13]:
# setting Date as the Index
ctr_video = ctr_video.set_index('Date')

Imputing Missing Values using KNN Imputer

In [14]:
# Finding numerical and categorical features
num_features = ctr_video.select_dtypes(include=['int64', 'float64']).columns
cat_features = ctr_video.select_dtypes(include=['object','category']).columns

# creating a dictionary for ordinal encoding -- > Doing ordinal encoding before KNN for object datatype
ordinal_enc_dict = {}

for col_name in ctr_video[cat_features]:
    # Create Ordinal encoder for col
    ordinal_enc_dict[col_name] = OrdinalEncoder()
    col = ctr_video[col_name]
    
    # Select non-null values of col
    col_not_null = col[col.notnull()]
    reshaped_vals = col_not_null.values.reshape(-1, 1)
    encoded_vals = ordinal_enc_dict[col_name].fit_transform(reshaped_vals)
    
    # Store the values to non-null values of the column
    ctr_video.loc[col.notnull(), col_name] = np.squeeze(encoded_vals)
# KNN imputer for imputin missing values using KNN
KNN_imputer = KNN()

# Impute the DataFrame
ctr_video.iloc[:, :] = KNN_imputer.fit_transform(ctr_video)
ctr_video.head()
Imputing row 1/13217 with 0 missing, elapsed time: 16.257
Imputing row 101/13217 with 0 missing, elapsed time: 16.258
Imputing row 201/13217 with 0 missing, elapsed time: 16.259
Imputing row 301/13217 with 0 missing, elapsed time: 16.260
Imputing row 401/13217 with 0 missing, elapsed time: 16.261
Imputing row 501/13217 with 3 missing, elapsed time: 16.262
Imputing row 601/13217 with 0 missing, elapsed time: 16.263
Imputing row 701/13217 with 0 missing, elapsed time: 16.263
Imputing row 801/13217 with 0 missing, elapsed time: 16.264
Imputing row 901/13217 with 0 missing, elapsed time: 16.265
Imputing row 1001/13217 with 0 missing, elapsed time: 16.265
Imputing row 1101/13217 with 0 missing, elapsed time: 16.265
Imputing row 1201/13217 with 0 missing, elapsed time: 16.265
Imputing row 1301/13217 with 0 missing, elapsed time: 16.265
Imputing row 1401/13217 with 0 missing, elapsed time: 16.266
Imputing row 1501/13217 with 0 missing, elapsed time: 16.266
Imputing row 1601/13217 with 0 missing, elapsed time: 16.266
Imputing row 1701/13217 with 0 missing, elapsed time: 16.266
Imputing row 1801/13217 with 0 missing, elapsed time: 16.266
Imputing row 1901/13217 with 0 missing, elapsed time: 16.267
Imputing row 2001/13217 with 0 missing, elapsed time: 16.267
Imputing row 2101/13217 with 0 missing, elapsed time: 16.267
Imputing row 2201/13217 with 0 missing, elapsed time: 16.267
Imputing row 2301/13217 with 0 missing, elapsed time: 16.267
Imputing row 2401/13217 with 0 missing, elapsed time: 16.268
Imputing row 2501/13217 with 0 missing, elapsed time: 16.268
Imputing row 2601/13217 with 0 missing, elapsed time: 16.268
Imputing row 2701/13217 with 0 missing, elapsed time: 16.268
Imputing row 2801/13217 with 0 missing, elapsed time: 16.268
Imputing row 2901/13217 with 0 missing, elapsed time: 16.269
Imputing row 3001/13217 with 0 missing, elapsed time: 16.269
Imputing row 3101/13217 with 0 missing, elapsed time: 16.269
Imputing row 3201/13217 with 0 missing, elapsed time: 16.269
Imputing row 3301/13217 with 0 missing, elapsed time: 16.269
Imputing row 3401/13217 with 0 missing, elapsed time: 16.270
Imputing row 3501/13217 with 0 missing, elapsed time: 16.270
Imputing row 3601/13217 with 0 missing, elapsed time: 16.270
Imputing row 3701/13217 with 0 missing, elapsed time: 16.270
Imputing row 3801/13217 with 0 missing, elapsed time: 16.270
Imputing row 3901/13217 with 0 missing, elapsed time: 16.272
Imputing row 4001/13217 with 0 missing, elapsed time: 16.272
Imputing row 4101/13217 with 0 missing, elapsed time: 16.273
Imputing row 4201/13217 with 0 missing, elapsed time: 16.273
Imputing row 4301/13217 with 0 missing, elapsed time: 16.273
Imputing row 4401/13217 with 0 missing, elapsed time: 16.274
Imputing row 4501/13217 with 0 missing, elapsed time: 16.274
Imputing row 4601/13217 with 0 missing, elapsed time: 16.275
Imputing row 4701/13217 with 0 missing, elapsed time: 16.275
Imputing row 4801/13217 with 0 missing, elapsed time: 16.275
Imputing row 4901/13217 with 0 missing, elapsed time: 16.276
Imputing row 5001/13217 with 0 missing, elapsed time: 16.276
Imputing row 5101/13217 with 0 missing, elapsed time: 16.276
Imputing row 5201/13217 with 0 missing, elapsed time: 16.276
Imputing row 5301/13217 with 0 missing, elapsed time: 16.277
Imputing row 5401/13217 with 0 missing, elapsed time: 16.277
Imputing row 5501/13217 with 0 missing, elapsed time: 16.278
Imputing row 5601/13217 with 0 missing, elapsed time: 16.278
Imputing row 5701/13217 with 0 missing, elapsed time: 16.279
Imputing row 5801/13217 with 0 missing, elapsed time: 16.279
Imputing row 5901/13217 with 0 missing, elapsed time: 16.279
Imputing row 6001/13217 with 0 missing, elapsed time: 16.280
Imputing row 6101/13217 with 0 missing, elapsed time: 16.280
Imputing row 6201/13217 with 0 missing, elapsed time: 16.280
Imputing row 6301/13217 with 0 missing, elapsed time: 16.281
Imputing row 6401/13217 with 0 missing, elapsed time: 16.281
Imputing row 6501/13217 with 0 missing, elapsed time: 16.282
Imputing row 6601/13217 with 0 missing, elapsed time: 16.282
Imputing row 6701/13217 with 0 missing, elapsed time: 16.282
Imputing row 6801/13217 with 0 missing, elapsed time: 16.282
Imputing row 6901/13217 with 0 missing, elapsed time: 16.283
Imputing row 7001/13217 with 0 missing, elapsed time: 16.283
Imputing row 7101/13217 with 0 missing, elapsed time: 16.283
Imputing row 7201/13217 with 0 missing, elapsed time: 16.284
Imputing row 7301/13217 with 0 missing, elapsed time: 16.284
Imputing row 7401/13217 with 0 missing, elapsed time: 16.284
Imputing row 7501/13217 with 0 missing, elapsed time: 16.285
Imputing row 7601/13217 with 0 missing, elapsed time: 16.285
Imputing row 7701/13217 with 0 missing, elapsed time: 16.285
Imputing row 7801/13217 with 0 missing, elapsed time: 16.286
Imputing row 7901/13217 with 0 missing, elapsed time: 16.286
Imputing row 8001/13217 with 0 missing, elapsed time: 16.286
Imputing row 8101/13217 with 0 missing, elapsed time: 16.287
Imputing row 8201/13217 with 0 missing, elapsed time: 16.287
Imputing row 8301/13217 with 0 missing, elapsed time: 16.287
Imputing row 8401/13217 with 0 missing, elapsed time: 16.288
Imputing row 8501/13217 with 0 missing, elapsed time: 16.288
Imputing row 8601/13217 with 0 missing, elapsed time: 16.288
Imputing row 8701/13217 with 0 missing, elapsed time: 16.289
Imputing row 8801/13217 with 0 missing, elapsed time: 16.289
Imputing row 8901/13217 with 0 missing, elapsed time: 16.289
Imputing row 9001/13217 with 0 missing, elapsed time: 16.290
Imputing row 9101/13217 with 0 missing, elapsed time: 16.290
Imputing row 9201/13217 with 0 missing, elapsed time: 16.290
Imputing row 9301/13217 with 0 missing, elapsed time: 16.290
Imputing row 9401/13217 with 0 missing, elapsed time: 16.291
Imputing row 9501/13217 with 0 missing, elapsed time: 16.291
Imputing row 9601/13217 with 0 missing, elapsed time: 16.291
Imputing row 9701/13217 with 0 missing, elapsed time: 16.292
Imputing row 9801/13217 with 0 missing, elapsed time: 16.292
Imputing row 9901/13217 with 0 missing, elapsed time: 16.292
Imputing row 10001/13217 with 0 missing, elapsed time: 16.293
Imputing row 10101/13217 with 0 missing, elapsed time: 16.293
Imputing row 10201/13217 with 0 missing, elapsed time: 16.293
Imputing row 10301/13217 with 0 missing, elapsed time: 16.294
Imputing row 10401/13217 with 0 missing, elapsed time: 16.294
Imputing row 10501/13217 with 0 missing, elapsed time: 16.294
Imputing row 10601/13217 with 0 missing, elapsed time: 16.294
Imputing row 10701/13217 with 0 missing, elapsed time: 16.295
Imputing row 10801/13217 with 0 missing, elapsed time: 16.295
Imputing row 10901/13217 with 0 missing, elapsed time: 16.296
Imputing row 11001/13217 with 0 missing, elapsed time: 16.296
Imputing row 11101/13217 with 0 missing, elapsed time: 16.296
Imputing row 11201/13217 with 0 missing, elapsed time: 16.297
Imputing row 11301/13217 with 0 missing, elapsed time: 16.298
Imputing row 11401/13217 with 0 missing, elapsed time: 16.299
Imputing row 11501/13217 with 0 missing, elapsed time: 16.299
Imputing row 11601/13217 with 0 missing, elapsed time: 16.300
Imputing row 11701/13217 with 3 missing, elapsed time: 16.301
Imputing row 11801/13217 with 0 missing, elapsed time: 16.303
Imputing row 11901/13217 with 0 missing, elapsed time: 16.304
Imputing row 12001/13217 with 0 missing, elapsed time: 16.305
Imputing row 12101/13217 with 0 missing, elapsed time: 16.306
Imputing row 12201/13217 with 0 missing, elapsed time: 16.308
Imputing row 12301/13217 with 0 missing, elapsed time: 16.310
Imputing row 12401/13217 with 0 missing, elapsed time: 16.311
Imputing row 12501/13217 with 0 missing, elapsed time: 16.312
Imputing row 12601/13217 with 0 missing, elapsed time: 16.314
Imputing row 12701/13217 with 0 missing, elapsed time: 16.315
Imputing row 12801/13217 with 0 missing, elapsed time: 16.316
Imputing row 12901/13217 with 0 missing, elapsed time: 16.317
Imputing row 13001/13217 with 0 missing, elapsed time: 16.318
Imputing row 13101/13217 with 0 missing, elapsed time: 16.319
Imputing row 13201/13217 with 0 missing, elapsed time: 16.321
Out[14]:
Country ad_type2_impressions ad_type2_videos_completed ad_type1_CTR ad_type2_CTR
Date
2019-01-02 0.0 12900.0 0.97590 0.0147 0.01370
2019-01-02 1.0 35458.0 0.97770 0.0163 0.01450
2019-01-02 2.0 1006527.0 0.98030 0.0065 0.00470
2019-01-02 3.0 25316.6 0.78624 0.0125 0.01278
2019-01-02 5.0 1290808.0 0.98500 0.0058 0.00480
In [15]:
ctr_video_scaled = ctr_video.copy()

Scaling the data using Min Max scaler

In [16]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

scaler = MinMaxScaler()
numeric_features = ['ad_type2_impressions','ad_type2_videos_completed','ad_type1_CTR','ad_type2_CTR']
# numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])
# df2 = pd.DataFrame(scaler.fit_transform(df),
#                    columns=['ad_type1_impressions','ad_type1_CTR','ad_type2_impressions','ad_type2_videos_completed','ad_type2_CTR'],
#                    index = ['Date'])
ctr_video_scaled[numeric_features] = scaler.fit_transform(ctr_video_scaled[numeric_features])
ctr_video_scaled.head()
Out[16]:
Country ad_type2_impressions ad_type2_videos_completed ad_type1_CTR ad_type2_CTR
Date
2019-01-02 0.0 0.000033 0.927133 0.155063 0.150901
2019-01-02 1.0 0.000863 0.928843 0.171941 0.159910
2019-01-02 2.0 0.036617 0.931313 0.068565 0.049550
2019-01-02 3.0 0.000490 0.746950 0.131857 0.140541
2019-01-02 5.0 0.047084 0.935778 0.061181 0.050676
In [17]:
# Loop over the column names
for col_name in cat_features:
    
    # Reshape the data
    reshaped = ctr_video_scaled[col_name].values.reshape(-1, 1)
    # Perform inverse transform of the ordinally encoded columns
    ctr_video_scaled[col_name] = ordinal_enc_dict[col_name].inverse_transform(reshaped)
ctr_video_scaled.head()
Out[17]:
Country ad_type2_impressions ad_type2_videos_completed ad_type1_CTR ad_type2_CTR
Date
2019-01-02 Albania 0.000033 0.927133 0.155063 0.150901
2019-01-02 Algeria 0.000863 0.928843 0.171941 0.159910
2019-01-02 Argentina 0.036617 0.931313 0.068565 0.049550
2019-01-02 Armenia 0.000490 0.746950 0.131857 0.140541
2019-01-02 Australia 0.047084 0.935778 0.061181 0.050676
In [18]:
# Loop over the column names
for col_name in cat_features:
    
    # Reshape the data
    reshaped = ctr_video[col_name].values.reshape(-1, 1)
    # Perform inverse transform of the ordinally encoded columns
    ctr_video[col_name] = ordinal_enc_dict[col_name].inverse_transform(reshaped)
ctr_video.head()
Out[18]:
Country ad_type2_impressions ad_type2_videos_completed ad_type1_CTR ad_type2_CTR
Date
2019-01-02 Albania 12900.0 0.97590 0.0147 0.01370
2019-01-02 Algeria 35458.0 0.97770 0.0163 0.01450
2019-01-02 Argentina 1006527.0 0.98030 0.0065 0.00470
2019-01-02 Armenia 25316.6 0.78624 0.0125 0.01278
2019-01-02 Australia 1290808.0 0.98500 0.0058 0.00480

Visualizing the trend of United states for Impressions, videos completed and CTR

In [19]:
fig = px.line(ctr_video_scaled[ctr_video_scaled.Country=="United States"].reset_index(), x = 'Date',y = ['ad_type2_impressions','ad_type2_CTR'],title = 'Impressions Vs Click Through Rate')

fig.update_xaxes(
    rangeslider_visible= True,
    rangeselector=dict(
                        buttons = list([
                        dict(count = 1,label = '1m',step='month',stepmode = "backward"),
                        dict(count = 2,label = '6m',step='month',stepmode = "backward"),
                        dict(count = 3,label = '12m',step='month',stepmode = "todate"),
                        dict(step= 'all')
                            ])        
                        )
                   )
fig.show()
In [20]:
fig = px.line(ctr_video_scaled[ctr_video_scaled.Country=="United States"].reset_index(), x = 'Date',y = ['ad_type2_impressions','ad_type2_CTR'],title = 'Impressions Vs Click Through Rate')

fig.update_xaxes(
    rangeslider_visible= True,
    rangeselector=dict(
                        buttons = list([
                        dict(count = 1,label = '1m',step='month',stepmode = "backward"),
                        dict(count = 2,label = '6m',step='month',stepmode = "backward"),
                        dict(count = 3,label = '12m',step='month',stepmode = "todate"),
                        dict(step= 'all')
                            ])        
                        )
                   )
fig.show()
In [21]:
fig = px.line(ctr_video_scaled[ctr_video_scaled.Country=="India"].reset_index(), x = 'Date',y = ['ad_type2_CTR','ad_type2_impressions'],title = 'Impressions Vs Click Through Rate')

fig.update_xaxes(
    rangeslider_visible= True,
    rangeselector=dict(
                        buttons = list([
                        dict(count = 1,label = '1m',step='month',stepmode = "backward"),
                        dict(count = 2,label = '6m',step='month',stepmode = "backward"),
                        dict(count = 3,label = '12m',step='month',stepmode = "todate"),
                        dict(step= 'all')
                            ])        
                        )
                   )
fig.show()
In [22]:
ctr_video_scaled.query("Country=='United States'")[['ad_type2_impressions','ad_type2_CTR','ad_type2_videos_completed']].plot()
Out[22]:
<AxesSubplot:xlabel='Date'>
In [23]:
# Unique country names list
ctr_video['Country'].unique()
Out[23]:
array(['Albania', 'Algeria', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahrain', 'Bangladesh', 'Belgium',
       'Bolivia', 'Bosnia and Herzegovina', 'Brazil', 'Bulgaria',
       'Cambodia', 'Canada', 'Chile', 'Colombia', 'Costa Rica', 'Cyprus',
       'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Estonia',
       'France', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Guatemala',
       'Honduras', 'Hong Kong', 'India', 'Indonesia', 'Iraq', 'Ireland',
       'Israel', 'Italy', 'Jamaica', 'Jordan', 'Kenya', 'Kosovo',
       'Kuwait', 'Laos', 'Latvia', 'Lebanon', 'Lithuania', 'Luxembourg',
       'Macao', 'Macedonia', 'Malaysia', 'Mexico', 'Mongolia', 'Morocco',
       'Myanmar (Burma)', 'Nepal', 'Netherlands', 'New Zealand',
       'Nicaragua', 'Nigeria', 'Oman', 'Pakistan', 'Palestine', 'Panama',
       'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal',
       'Puerto Rico', 'Qatar', 'Romania', 'Saudi Arabia', 'Serbia',
       'Singapore', 'Slovenia', 'South Africa', 'Spain', 'Sri Lanka',
       'Sweden', 'Switzerland', 'Taiwan', 'Thailand',
       'Trinidad and Tobago', 'Tunisia', 'Turkey', 'Ukraine',
       'United Arab Emirates', 'United Kingdom', 'United States',
       'Uruguay', 'Venezuela', 'Vietnam', 'China', 'Croatia',
       'Czech Republic', 'Denmark', 'Finland', 'Hungary', 'Iceland',
       'Japan', 'Korea, South', 'Malta', 'Norway', 'Russia', 'Slovakia',
       'Syria', 'Tanzania', 'Unknown', 'Kazakhstan', 'Moldova', 'Reunion',
       'Zambia', 'Belarus', 'Guadeloupe', 'Martinique', 'Namibia',
       "Cote d'Ivoire", 'Mauritius', 'Cameroon', 'Senegal',
       'Bahamas, The', 'Mozambique', 'Zimbabwe', 'Uganda', 'Uzbekistan',
       'Aruba', 'Guam'], dtype=object)
In [24]:
# scaler.inverse_transform(ctr_video)
ctr_video.head()
Out[24]:
Country ad_type2_impressions ad_type2_videos_completed ad_type1_CTR ad_type2_CTR
Date
2019-01-02 Albania 12900.0 0.97590 0.0147 0.01370
2019-01-02 Algeria 35458.0 0.97770 0.0163 0.01450
2019-01-02 Argentina 1006527.0 0.98030 0.0065 0.00470
2019-01-02 Armenia 25316.6 0.78624 0.0125 0.01278
2019-01-02 Australia 1290808.0 0.98500 0.0058 0.00480

Preparing Data for FbProphet model

In [25]:
# Renaming target column as 'y'
ctr_video.rename(columns={'ad_type2_CTR': 'y'},inplace = True)
In [26]:
# Renaming data index to 'ds'
ctr_video.index.names = ['ds']
In [27]:
# Since Namibia and Guam and has less than 3 training values so it is not possible to model them using fbpropeht - 
# As this algorithm requires minimum two datapoints to be trained
# Grouping the data at Country level so that we can run model for each country individually
country_vid_ctr = ctr_video[(ctr_video.Country != 'Namibia') & (ctr_video.Country != 'Guam')].reset_index().groupby('Country')
country_vid_ctr.head()
Out[27]:
ds Country ad_type2_impressions ad_type2_videos_completed ad_type1_CTR y
0 2019-01-02 Albania 12900.0 0.97590 0.0147 0.01370
1 2019-01-02 Algeria 35458.0 0.97770 0.0163 0.01450
2 2019-01-02 Argentina 1006527.0 0.98030 0.0065 0.00470
3 2019-01-02 Armenia 25316.6 0.78624 0.0125 0.01278
4 2019-01-02 Australia 1290808.0 0.98500 0.0058 0.00480
... ... ... ... ... ... ...
11779 2019-04-16 Aruba 12856.0 0.00000 0.0000 0.00580
11890 2019-04-17 Aruba 12606.0 0.00000 0.0000 0.00670
11998 2019-04-18 Aruba 12386.0 0.00000 0.0000 0.00770
12105 2019-04-19 Aruba 12794.0 1.00000 0.0000 0.00520
12212 2019-04-20 Aruba 12630.0 0.00000 0.0000 0.00470

633 rows × 6 columns

In [28]:
# Displaying data
country_vid_ctr.head()
Out[28]:
ds Country ad_type2_impressions ad_type2_videos_completed ad_type1_CTR y
0 2019-01-02 Albania 12900.0 0.97590 0.0147 0.01370
1 2019-01-02 Algeria 35458.0 0.97770 0.0163 0.01450
2 2019-01-02 Argentina 1006527.0 0.98030 0.0065 0.00470
3 2019-01-02 Armenia 25316.6 0.78624 0.0125 0.01278
4 2019-01-02 Australia 1290808.0 0.98500 0.0058 0.00480
... ... ... ... ... ... ...
11779 2019-04-16 Aruba 12856.0 0.00000 0.0000 0.00580
11890 2019-04-17 Aruba 12606.0 0.00000 0.0000 0.00670
11998 2019-04-18 Aruba 12386.0 0.00000 0.0000 0.00770
12105 2019-04-19 Aruba 12794.0 1.00000 0.0000 0.00520
12212 2019-04-20 Aruba 12630.0 0.00000 0.0000 0.00470

633 rows × 6 columns

Model Development - fbProphet

In [29]:
# Building model

target = pd.DataFrame()

# Running the loop for each country and creating an individual series and running the model accordingly
for country in country_vid_ctr.groups:
    group = country_vid_ctr.get_group(country)
    # Defining model
    model = Prophet(interval_width=0.95)   
    # Adding Extra Regressor in the model
    model.add_regressor('ad_type2_impressions') 
    model.add_regressor('ad_type2_videos_completed') 
    model.add_regressor('ad_type1_CTR') 
    # Dividing the model in Training and Testing
    train_size = int(len(group) *0.8)
    train = group[:train_size]
    test_size = int(len(group)) - train_size
    # Fitting the model
    model.fit(train)
    # Creating Future Dataframe
    future = model.make_future_dataframe(periods=test_size)
    future['ad_type2_impressions']=list(group["ad_type2_impressions"])
    future['ad_type2_videos_completed']=list(group["ad_type2_videos_completed"])
    future['ad_type1_CTR']=list(group["ad_type1_CTR"])
    # Predicting future Dataframe
    forecast = model.predict(future)    
    # Plotting the result
    model.plot(forecast)
    # Renaming column names based on their Country
    forecast = forecast.rename(columns={'yhat':'yhat_'+country, 'yhat_lower': 'yhat_lower_'+country, 'yhat_upper': 'yhat_upper_'+country})
    forecast['y_'+country]=list(group["y"])
    # Calculating Error in predicted value by subtarcting the prediction fromt he original
    forecast['error_'+country] = forecast['y_'+country] - forecast['yhat_'+country]
    # Calculating the uncertainity in the model by subtracting lower bound from the Upper Bound ( Confidence Interval)
    forecast['uncertainity_'+country] = forecast['yhat_upper_'+country] - forecast['yhat_lower_'+country]
    # Finding anomalies based on uncertainity - categorizing values which are 1.5 times of uncertainity as an anomaly
    forecast['anomaly_'+country] = forecast.apply(lambda x: 'Yes' if (np.abs(x['error_'+country]) > 1.5*x['uncertainity_'+country]) else 'No', axis=1)
    # Saving the results in the target dataframe
    target = pd.merge(target, forecast[['yhat_'+country,'yhat_lower_'+country,'yhat_upper_'+country,'y_'+country,'ds','error_'+country, 'uncertainity_'+country, 'anomaly_'+country]].set_index('ds'), how='outer',
                    left_index=True, right_index=True)
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 2.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 9.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 17.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
C:\Users\FX253KA\Anaconda3\envs\3.7_env\lib\site-packages\fbprophet\plot.py:66: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 21.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 10.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 0.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 4.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 11.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 15.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 6.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 8.
In [30]:
display(target)
yhat_Albania yhat_lower_Albania yhat_upper_Albania y_Albania error_Albania uncertainity_Albania anomaly_Albania yhat_Algeria yhat_lower_Algeria yhat_upper_Algeria ... error_Zambia uncertainity_Zambia anomaly_Zambia yhat_Zimbabwe yhat_lower_Zimbabwe yhat_upper_Zimbabwe y_Zimbabwe error_Zimbabwe uncertainity_Zimbabwe anomaly_Zimbabwe
ds
2019-01-02 0.015897 0.010457 0.020970 0.01370 -0.002197 0.010513 No 0.016906 0.012021 0.021855 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2019-01-03 0.016658 0.010948 0.021773 0.01400 -0.002658 0.010825 No 0.017569 0.013159 0.022104 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2019-01-04 0.014649 0.008923 0.020236 0.01560 0.000951 0.011313 No 0.018233 0.013440 0.022624 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2019-01-05 0.013366 0.007608 0.018657 0.01430 0.000934 0.011049 No 0.017287 0.012631 0.021875 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2019-01-06 0.012515 0.006961 0.017893 0.01520 0.002685 0.010932 No 0.015868 0.010860 0.020512 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2019-04-25 0.136371 0.131072 0.141746 0.01810 -0.118271 0.010674 Yes 0.033700 0.029029 0.038494 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2019-04-26 0.061246 0.055846 0.066670 0.01472 -0.046526 0.010824 Yes 0.014654 0.010004 0.019297 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2019-04-27 0.036963 0.031030 0.042960 0.01390 -0.023063 0.011929 Yes 0.012938 0.008098 0.017716 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2019-04-28 0.061386 0.056303 0.067058 0.01404 -0.047346 0.010755 Yes 0.024515 0.019830 0.029092 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2019-04-29 0.038137 0.032409 0.043645 0.01390 -0.024237 0.011236 Yes 0.012273 0.007858 0.016771 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

118 rows × 889 columns

In [31]:
target.columns
Out[31]:
Index(['yhat_Albania', 'yhat_lower_Albania', 'yhat_upper_Albania', 'y_Albania',
       'error_Albania', 'uncertainity_Albania', 'anomaly_Albania',
       'yhat_Algeria', 'yhat_lower_Algeria', 'yhat_upper_Algeria',
       ...
       'error_Zambia', 'uncertainity_Zambia', 'anomaly_Zambia',
       'yhat_Zimbabwe', 'yhat_lower_Zimbabwe', 'yhat_upper_Zimbabwe',
       'y_Zimbabwe', 'error_Zimbabwe', 'uncertainity_Zimbabwe',
       'anomaly_Zimbabwe'],
      dtype='object', length=889)
In [32]:
target = target.fillna(0)
In [33]:
# determining Severity of the anomaly by categorizing it in multiple categories 3 being the most severe.

# Function to map the severity level based on the devaition from the standard mean
#  Compute the rolling mean and rolling standard deviation(window is a week).
# Classify data with an error of 1.5,1.75 and 2 standard deviations as limits for low,medium and high anomalies. 
# (5% of data point would be identified anomalies based on this property)

def anomaly_detection(target, win):
    for country in country_vid_ctr.groups:
#         target.fillna(0,inplace = True)
        target['percentage_change_'+country] = ((target['y_'+country] - target['yhat_'+country]) / target['y_'+country]) * 100
        target['meanval_'+country] = target['error_'+country].rolling(window=win).mean()
        target['deviation_'+country] = target['error_'+country].rolling(window=win).std()
        target['-3s_'+country] = target['meanval_'+country] - (2 * target['deviation_'+country])
        target['3s_'+country] = target['meanval_'+country] + (2 * target['deviation_'+country])
        target['-2s_'+country] = target['meanval_'+country] - (1.75 * target['deviation_'+country])
        target['2s_'+country] = target['meanval_'+country] + (1.75 * target['deviation_'+country])
        target['-1s_'+country] = target['meanval_'+country] - (1.5 * target['deviation_'+country])
        target['1s_'+country] = target['meanval_'+country] + (1.5 * target['deviation_'+country])
        cut_list = target[['error_'+country, '-3s_'+country, '-2s_'+country, '-1s_'+country, 'meanval_'+country, '1s_'+country, '2s_'+country, '3s_'+country]]
        cut_values = cut_list.values
        cut_sort = np.sort(cut_values)
        target['impact_'+country] = [(lambda x: np.where(cut_sort == target['error_'+country][x])[1][0])(x) for x in
                               range(len(target['error_'+country]))]
        severity = {0: 3, 1: 2, 2: 1, 3: 0, 4: 0, 5: 1, 6: 2, 7: 3}
        region = {0: "NEGATIVE", 1: "NEGATIVE", 2: "NEGATIVE", 3: "NEGATIVE", 4: "POSITIVE", 5: "POSITIVE", 6: "POSITIVE",
              7: "POSITIVE"}
        target['color_'+country] =  target['impact_'+country].map(severity)
        target['region_'+country] = target['impact_'+country].map(region)
        target['anomaly_points_'+country] = np.where(target['color_'+country] == 3, target['error_'+country], np.nan)

    return target
anomaly_severity = anomaly_detection(target, 7)
# anomaly_severity
In [34]:
(0.145270 - 0.156967)*100/0.145270
Out[34]:
-8.051903352378318
In [35]:
target[['impact_India','color_India','anomaly_points_India','y_India','yhat_India','percentage_change_India']]
Out[35]:
impact_India color_India anomaly_points_India y_India yhat_India percentage_change_India
ds
2019-01-02 0 3 -0.001036 0.0132 0.014236 -7.844857
2019-01-03 0 3 -0.000591 0.0135 0.014091 -4.380766
2019-01-04 0 3 -0.000024 0.0140 0.014024 -0.168961
2019-01-05 0 3 0.000060 0.0137 0.013640 0.435472
2019-01-06 0 3 0.000016 0.0138 0.013784 0.112420
... ... ... ... ... ... ...
2019-04-25 4 0 NaN 0.0172 0.017966 -4.454280
2019-04-26 3 0 NaN 0.0147 0.016601 -12.931819
2019-04-27 4 0 NaN 0.0146 0.015973 -9.407365
2019-04-28 4 0 NaN 0.0152 0.016418 -8.015305
2019-04-29 3 0 NaN 0.0135 0.015127 -12.048261

118 rows × 6 columns

plotting the Results of United States

In [71]:
country_filter = 'United States'
def plot_anomaly(df,metric_name):
    dates = df.ds
    bool_array = (abs(df['anomaly_points_'+country_filter]) > 0)
    actuals = df["y_"+country_filter][-len(bool_array):]
    anomaly_points = bool_array * actuals
    anomaly_points[anomaly_points == 0] = np.nan
    color_map= {0: "aliceblue", 1: "yellow", 2: "orange", 3: "red"}
    table = go.Table(
    domain=dict(x=[0, 1],
                y=[0, 0.3]),
    columnwidth=[1, 2 ],
    header = dict(height = 20,
                  values = [['<b>Date</b>'],['<b>Actual Values </b>'],
                            ['<b>Predicted</b>'], ['<b>% Difference</b>'],['<b>Severity (0-3)</b>']],
                 font = dict(color=['rgb(45, 45, 45)'] * 5, size=14),
                  fill = dict(color='#d562be')),
    cells = dict(values = [df.round(3)[k].tolist() for k in ['ds', 'y_'+country_filter, 'yhat_'+country_filter,
                                                               'percentage_change_'+country_filter,'color_'+country_filter]],
                 line = dict(color='#506784'),
                 align = ['center'] * 5,
                 font = dict(color=['rgb(40, 40, 40)'] * 5, size=12),
                 suffix=[None] + [''] + [''] + ['%'] + [''],
                 height = 27,
                 fill=dict(color= 
                      [df['color_'+country_filter].map(color_map)],
                      )
    ))


    anomalies = go.Scatter(name="Anomaly",
                       x=dates,
                       xaxis='x1',
                       yaxis='y1',
                       y=df['anomaly_points_'+country_filter],
                       mode='markers',
                       marker = dict(color ='red',
                      size = 11,line = dict(
                                         color = "red",
                                         width = 2)))
    


    upper_bound = go.Scatter(hoverinfo="skip",
                         x=dates,
                         showlegend =False,
                         xaxis='x1',
                         yaxis='y1',
                         y=df['3s_'+country_filter],
                         marker=dict(color="#444"),
                         line=dict(
                             color=('rgb(23, 96, 167)'),
                             width=2,
                             dash='dash'),
                         fillcolor='rgba(68, 68, 68, 0.3)',
                         fill='tonexty')

    lower_bound = go.Scatter(name='Confidence Interval',
                          x=dates,
                         xaxis='x1',
                         yaxis='y1',
                          y=df['-3s_'+country_filter],
                          marker=dict(color="#444"),
                          line=dict(
                              color=('rgb(23, 96, 167)'),
                              width=2,
                              dash='dash'),
                          fillcolor='rgba(68, 68, 68, 0.3)',
                          fill='tonexty')

    Actuals = go.Scatter(name= 'Actuals',
                     x= dates,
                     y= df['y_'+country_filter],
                    xaxis='x2', yaxis='y2',
                     mode='lines',
                     marker=dict(size=12,
                                 line=dict(width=1),
                                 color="blue"))

    Predicted = go.Scatter(name= 'Predicted',
                     x= dates,
                     y= df['yhat_'+country_filter],
                    xaxis='x2', yaxis='y2',
                     mode='lines',
                     marker=dict(size=12,
                                 line=dict(width=1),
                                 color="orange"))



    # create plot for error...
    Error = go.Scatter(name="Error",
                   x=dates, y=df['error_'+country_filter],
                   xaxis='x1',
                   yaxis='y1',
                   mode='lines',
                   marker=dict(size=12,
                               line=dict(width=1),
                               color="red"),
                   text="Error")



    anomalies_map = go.Scatter(name = "anomaly actual",
                                   showlegend=False,
                                   x=dates,
                                   y=anomaly_points,
                                   mode='markers',
                                   xaxis='x2',
                                   yaxis='y2',
                                    marker = dict(color ="red",
                                  size = 11,
                                 line = dict(
                                     color = "red",
                                     width = 2)))

    Moving_average = go.Scatter(name="Moving Average",
                           x=dates,
                           y=df['meanval_'+country_filter],
                           mode='lines',
                           xaxis='x1',
                           yaxis='y1',
                           marker=dict(size=12,
                                       line=dict(width=1),
                                       color="green"),
                           text="Moving average")




    axis=dict(
    showline=True,
    zeroline=False,
    showgrid=True,
    mirror=True,
    ticklen=4,
    gridcolor='#ffffff',
    tickfont=dict(size=10))

    layout = dict(
    width=1000,
    height=865,
    autosize=False,
    title= metric_name,
    margin = dict(t=75),
    showlegend=True,
    xaxis1=dict(axis, **dict(domain=[0, 1], anchor='y1', showticklabels=True)),
    xaxis2=dict(axis, **dict(domain=[0, 1], anchor='y2', showticklabels=True)),
    yaxis1=dict(axis, **dict(domain=[2 * 0.21 + 0.20 + 0.09, 1], anchor='x1', hoverformat='.2f')),
    yaxis2=dict(axis, **dict(domain=[0.21 + 0.12, 2 * 0.31 + 0.02], anchor='x2', hoverformat='.2f')))






    fig = go.Figure(data = [table,anomalies,anomalies_map,
                        upper_bound,lower_bound,Actuals,Predicted,
                        Moving_average,Error], layout = layout)
   

    iplot(fig)
    pyplot.show()

anomaly_severity = anomaly_detection(target, 7)
anomaly_severity.reset_index(inplace=True)

# Since the rolling window is seven days so I am plotting the data after seven days to avoid false anomalies
end_date = anomaly_severity.ds.min() + datetime.timedelta(days=7)
plot_anomaly(anomaly_severity[anomaly_severity.ds >= end_date],"Anomaly of a Country")